#### Main findings  ####
form_ced_main <- formula(paste(dv_ced, "~", iv_all_names, "+", ced_controls))
form_primary_main <- formula(paste(dv_prim, "~", iv_all_names, "+", primary_controls))
form_general_main <- formula(paste(dv_gen, "~", iv_all_names, "+", general_controls))

# CA local elections
mod_all_ced <- lm(form_ced_main,
                  data = ced)
cedr2 <- get_r2(mod_all_ced)
mod_all_ced <- get_clusters(mod_all_ced)

# Primary congressional elections
mod_all_primary <- lm(form_primary_main,
                      data = primary)

primaryr2 <- get_r2(mod_all_primary)
mod_all_primary <- get_clusters(mod_all_primary)

# General congressional elections
mod_all_general <- lm(form_general_main,
                      data = general)

generalr2 <- get_r2(mod_all_general)
mod_all_general <- get_clusters(mod_all_general)


stargazer(mod_all_general, mod_all_primary, mod_all_ced,
          type = "latex",
          title = "Relationship Between Name Fluency and Vote Share",
          style = style,
          star.cutoffs = star.cutoffs,
          star.char = star.char,
          column.labels = column.labels,
          keep.stat = keep.stat,
          notes = notes,
          keep = keep,
          covariate.labels = covariate.labels,
          font.size = "footnotesize",
          model.names = F,
          add.lines=list(c("Year FE", "\\checkmark", "\\checkmark", "\\checkmark"),
                         c("State FE", "\\checkmark", "\\checkmark", ""),
                         c("County FE", "", "", "\\checkmark"),
                         c("N", get_n(mod_all_general), get_n(mod_all_primary), get_n(mod_all_ced)),
                         c("Adj. R-squared", generalr2, primaryr2, cedr2)),
          out = "tables/reg-main.tex",
          label = "tab:reg-main")


#### Main findings plot ####
plotfun <- function(data, election){
  # Coefs
  lalgorithm = data[2,1] 
  falgorithm = data[3,1]
  lcommon = data[4,1]
  fcommon = data[5,1]
  
  # low bound
  low_lalgorithm = lalgorithm - 1.96*data[2,2]
  low_falgorithm = falgorithm - 1.96*data[3,2]
  low_lcommon = lcommon - 1.96*data[4,2]
  low_fcommon = fcommon - 1.96*data[5,2]
  
  # high bound
  high_lalgorithm = lalgorithm + 1.96*data[2,2]
  high_falgorithm = falgorithm + 1.96*data[3,2]
  high_lcommon = lcommon + 1.96*data[4,2]
  high_fcommon = fcommon + 1.96*data[5,2]
  
  plotdata = tibble(estimate = c(lalgorithm, 
                                 falgorithm,
                                 lcommon, 
                                 fcommon),
                    lowbound = c(low_lalgorithm,
                                 low_falgorithm,
                                 low_lcommon,
                                 low_fcommon),
                    highbound = c(high_lalgorithm,
                                  high_falgorithm,
                                  high_lcommon,
                                  high_fcommon),
                    variablename = c("Surname Pronounceability",
                                     "First Name Pronounceability",
                                     "Surname Commonality",
                                     "First Name Commonality")) %>% 
    mutate(context = election)
  return(plotdata)
}

local_elect = plotfun(mod_all_ced, "Local Elections")
general_elect = plotfun(mod_all_general, "General Elections")
primary_elect = plotfun(mod_all_primary, "Primary Elections")

plotdata = rbind(general_elect,
                 primary_elect,
                 local_elect) %>% 
  arrange(context) %>% 
  mutate(concatname = paste(context, variablename, sep = ", ")) %>% 
  mutate(estimate = round(estimate, 2)) %>% 
  mutate(resultlabel = case_when(estimate >= 0 ~ paste0("+", estimate, "%"),
                                 estimate < 0 ~ paste0("", estimate, "%")))

plotdata$concatname = factor(plotdata$concatname, levels = c("Local Elections, First Name Commonality",
                                                             "Local Elections, Surname Commonality",
                                                             "Local Elections, First Name Pronounceability",
                                                             "Local Elections, Surname Pronounceability",
                                                             "Primary Elections, First Name Commonality",
                                                             "Primary Elections, Surname Commonality",
                                                             "Primary Elections, First Name Pronounceability",
                                                             "Primary Elections, Surname Pronounceability",
                                                             "General Elections, First Name Commonality",
                                                             "General Elections, Surname Commonality",
                                                             "General Elections, First Name Pronounceability",
                                                             "General Elections, Surname Pronounceability"))

plotdata$concatname2 = factor(plotdata$concatname, levels = c("Local Elections, First Name Commonality",
                                                              "Primary Elections, First Name Commonality",
                                                              "General Elections, First Name Commonality",
                                                              "Local Elections, Surname Commonality",
                                                              "Primary Elections, Surname Commonality",
                                                              "General Elections, Surname Commonality",
                                                              "Local Elections, First Name Pronounceability",
                                                              "Primary Elections, First Name Pronounceability",
                                                              "General Elections, First Name Pronounceability",
                                                              "Local Elections, Surname Pronounceability",
                                                              "Primary Elections, Surname Pronounceability",
                                                              "General Elections, Surname Pronounceability"))

plotdata$variablename = factor(plotdata$variablename, levels = c("First Name Commonality", 
                                                                 "Surname Commonality",
                                                                 "First Name Pronounceability",
                                                                 "Surname Pronounceability"))

plotdata$context = factor(plotdata$context, levels = c("General Elections",
                                                       "Primary Elections",
                                                       "Local Elections"))

ggplot(plotdata,
       aes(x = estimate,
           y = concatname2,
           color = context)) +
  geom_point() +
  geom_text(aes(label = resultlabel),
            nudge_y = .3,
            size = 3) +
  geom_errorbarh(aes(xmax = highbound,
                     xmin = lowbound),
                 height = 0) +
  scale_color_manual(values = c("#333333",
                                "#808080",
                                "#CCCCCC")) +
  xlim(c(-1, 2)) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  xlab("Change in vote share per 1 SD increase in name fluency") +
  ylab("") +
  theme_classic() +
  theme(text = element_text(family = "Times New Roman"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.title= element_blank(),
        legend.position = "bottom",
        axis.title.x  = element_text(family = "Times New Roman"))

ggsave("meet-journal-requirements/figures/reg-main-plot.pdf", width = 6, height = 4, units = "in", dpi = 600, device = cairo_pdf)

#### Analyze across different races ####
# ced_white, ced_black, ced_hisp
# dim_white, dime_black, dime_hisp
local_white <- ced %>% 
  filter(white == 1)
local_black <- ced %>% 
  filter(black == 1)
local_hisp <- ced %>% 
  filter(hisp == 1)

general_white <- general %>% 
  filter(white == 1)
general_black <- general %>% 
  filter(black == 1)
general_hisp <- general %>% 
  filter(hisp == 1)

primary_white <- primary %>% 
  filter(white == 1)
primary_black <- primary %>% 
  filter(black == 1)
primary_hisp <- primary %>% 
  filter(hisp == 1)

# local, white
# primary, white
# general, white

# local, black
# primary, black
# general, black

# local, hisp
# primary, hisp
# general, hisp

regout_race <- function(data_input, election_type){
  
  if(election_type == "local") {
    mod <-  lm(percent ~
                 scale(last_algorithm) +
                 scale(first_algorithm) +
                 scale(percent_freq) +
                 scale(fprop) +
                 incumbent +
                 female +
                 lchars +
                 totvotes1000 +
                 office +
                 seats_comps +
                 factor(year) +
                 factor(co_name),
               data = data_input)
    
    cedr2 = round(summary(mod)$adj.r.squared, 4)
    cedn = mod$df.residual
    
    mod_cluster <- vcovHC(mod, type = "HC0", cluster = "raceid", adjust = T)
    mod <- coeftest(mod, vcov = mod_cluster, save = T)
    
  }
  
  else if(election_type == "primary") {
    
    mod <- lm(ppct ~
                scale(last_algorithm) +
                scale(first_algorithm) +
                scale(percent_freq) +
                scale(fprop) +
                incumbent +
                female +
                lchars +
                race +
                num_prim_opps +
                factor(party) +
                factor(year),
              data = data_input)
    
    primaryr2 = round(summary(mod)$adj.r.squared, 4)
    primaryn = mod$df.residual
    
    mod_cluster <- vcovHC(mod, type = "HC0", cluster = "dcp", adjust = T)
    mod <- coeftest(mod, vcov = mod_cluster)
    
  }
  
  else if(election_type == "general") {
    
    mod <- lm(gpct ~
                scale(last_algorithm) +
                scale(first_algorithm) +
                scale(percent_freq) +
                scale(fprop) +
                incumbent +
                female +
                lchars +
                race +
                factor(party) +
                factor(year),
              data = data_input)
    
    generalr2 = round(summary(mod)$adj.r.squared, 4)
    generaln = mod$df.residual
    
    mod_cluster <- vcovHC(mod, type = "HC0", cluster = "id", adjust = T)
    mod <- coeftest(mod, vcov = mod_cluster) 
  }
  
  else {
    stop("Must specify a correct election type")
  }
  return(mod)
}

local_mod_white <- regout_race(local_white,
                               election_type = "local")
local_mod_black <- regout_race(local_black,
                               election_type = "local")
local_mod_hisp <- regout_race(local_hisp,
                              election_type = "local")

primary_mod_white <- regout_race(primary_white,
                                 election_type = "primary")
primary_mod_black <- regout_race(primary_black,
                                 election_type = "primary")
primary_mod_hisp <- regout_race(primary_hisp,
                                election_type = "primary")

general_mod_white <- regout_race(primary_white,
                                 election_type = "general")
general_mod_black <- regout_race(primary_black,
                                 election_type = "general")
general_mod_hisp <- regout_race(primary_hisp,
                                election_type = "general")


m1 = general_mod_white
m2 = primary_mod_white
m3 = local_mod_white
m4 = general_mod_black
m5 = primary_mod_black
m6 = local_mod_black
m7 = general_mod_hisp
m8 = primary_mod_hisp
m9 = local_mod_hisp
# general_mod_white, primary_mod_white, local_mod_white, 
# general_mod_black, primary_mod_black, local_mod_black,
# general_mod_hisp, primary_mod_hisp, local_mod_hisp,

stargazer(m1,m2,m3,m4,m5,m6,m7,m8,m9,
          type = "latex",
          title = "Relationship Between Name Fluency and Vote Share Moderated by Race",
          style = "ajps",
          font.size = "tiny",
          ci = F,
          column.labels = c("GC (white)", 
                            "PC (white)",
                            "Local (white)",
                            "GC (black)", 
                            "PC (black)",
                            "Local (black)",
                            "GC (hispanic)", 
                            "PC (hispanic)",
                            "Local (hispanic)"),
          star.cutoffs = c(.05, .01, .001),
          star.char = c("*", "**", "***"),
          notes = "Standard errors in parentheses and clustered by election. GC = general congressional elections. PC = primary congressional elections",
          keep = keep,
          covariate.labels = covariate.labels,
          add.lines=list(c("Year FE", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark"),
                         c("State FE", "X", "X", "X", "X", "X", "X", "X", "X", "X"),
                         c("County FE", "X", "X", "\\checkmark", "X", "X", "\\checkmark", "X", "X", "\\checkmark")),
          align = F,
          # column.labels = c("General Elections", "Primary Elections", "Local Elections"),
          keep.stat = c("n", "adj.rsq"),
          out = "Tables/reg-main-by-race.tex",
          label = "tab:reg-main-by-race")

#### Plot results with different races ####
plotfun <- function(data, election, race_input){
  # Coefs
  lalgorithm = data[2,1] 
  falgorithm = data[3,1]
  lcommon = data[4,1]
  fcommon = data[5,1]
  
  # low bound
  low_lalgorithm = lalgorithm - 1.96*data[2,2]
  low_falgorithm = falgorithm - 1.96*data[3,2]
  low_lcommon = lcommon - 1.96*data[4,2]
  low_fcommon = fcommon - 1.96*data[5,2]
  
  # high bound
  high_lalgorithm = lalgorithm + 1.96*data[2,2]
  high_falgorithm = falgorithm + 1.96*data[3,2]
  high_lcommon = lcommon + 1.96*data[4,2]
  high_fcommon = fcommon + 1.96*data[5,2]
  
  plotdata = tibble(estimate = c(lalgorithm, 
                                 falgorithm,
                                 lcommon, 
                                 fcommon),
                    lowbound = c(low_lalgorithm,
                                 low_falgorithm,
                                 low_lcommon,
                                 low_fcommon),
                    highbound = c(high_lalgorithm,
                                  high_falgorithm,
                                  high_lcommon,
                                  high_fcommon),
                    variablename = c("Surname Surname Pronounceability",
                                     "First Name Surname Pronounceability",
                                     "Surname Commonality",
                                     "First Name Commonality")) %>% 
    mutate(context = election) %>% 
    mutate(race = race_input)
  return(plotdata)
}

local_white <- plotfun(local_mod_white, "Local Elections", "White")
local_black <- plotfun(local_mod_black, "Local Elections", "Black")
local_hisp <- plotfun(local_mod_hisp, "Local Elections", "Hispanic")

primary_white <- plotfun(primary_mod_white, "Primary Elections", "White")
primary_black <- plotfun(primary_mod_black, "Primary Elections", "Black")
primary_hisp <- plotfun(primary_mod_hisp, "Primary Elections", "Hispanic")

general_white <- plotfun(general_mod_white, "General Elections", "White")
general_black <- plotfun(general_mod_black, "General Elections", "Black")
general_hisp <- plotfun(general_mod_hisp, "General Elections", "Hispanic")

plotdata = rbind(local_white,
                 local_black,
                 local_hisp,
                 primary_white,
                 primary_black,
                 primary_hisp,
                 general_white,
                 general_black,
                 general_hisp) %>% 
  arrange(context) %>% 
  mutate(concatname = paste(context, variablename, sep = ", ")) %>% 
  mutate(estimate = round(estimate, 2)) %>% 
  mutate(resultlabel = case_when(estimate >= 0 ~ paste0("+", estimate, "%"),
                                 estimate < 0 ~ paste0("", estimate, "%")))

plotdata$variablename = factor(plotdata$variablename, levels = c("First Name Commonality", 
                                                                 "Surname Commonality",
                                                                 "First Name Surname Pronounceability",
                                                                 "Surname Surname Pronounceability"))

plotdata$context = factor(plotdata$context, levels = c("General Elections",
                                                       "Primary Elections",
                                                       "Local Elections"))

pd <- position_dodge(width = 0.5)

ggplot(plotdata,
       aes(x = estimate,
           y = variablename,
           color = context)) +
  geom_point(position = pd) +
  geom_errorbarh(aes(xmax = highbound,
                     xmin = lowbound),
                 height = .2,
                 width = .2,
                 position = pd) +
  scale_color_manual(values = c("#333333",
                                "#808080",
                                "#CCCCCC")) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  xlab("Change in vote share per 1 SD increase in name fluency") +
  ylab("") +
  facet_wrap(~ race, dir = "v") +
  theme_classic() +
  theme(text = element_text(family = "Times New Roman"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.title= element_blank(),
        legend.position = "bottom",
        axis.title.x  = element_text(family = "Times New Roman"))

ggsave("meet-journal-requirements/figures/reg-main-race-plot.pdf", width = 6, height = 4, units = "in", dpi = 600, device = cairo_pdf)


#### Z scores for last_algorithm variable ####
ced$zscore = (ced$last_algorithm - mean(ced$last_algorithm, na.rm = T)) / sd(ced$last_algorithm, na.rm = T)

primary$zscore = (primary$last_algorithm - mean(primary$last_algorithm, na.rm = T)) / sd(primary$last_algorithm, na.rm = T)

general$zscore = (general$last_algorithm - mean(general$last_algorithm, na.rm = T)) / sd(general$last_algorithm, na.rm = T)

quantiles = tibble(quantiles = quantile(ced$zscore, na.rm = T, probs = c(.01, .25, .5, .75, .99)))

set.seed(14853)
z1 <- primary %>% 
  select(lname,last_algorithm,zscore) %>% 
  filter(zscore > -2.41 &
           zscore < -2.39) %>% 
  distinct() %>% 
  slice_sample(n = 10) %>% 
  mutate(percentile = "1st percentile (least fluent)")

z25 <- primary %>% 
  select(lname,last_algorithm,zscore) %>% 
  filter(zscore > -.319 &
           zscore < -.299) %>% 
  distinct() %>% 
  slice_sample(n = 10) %>% 
  mutate(percentile = "25th percentile")


z50 <- primary %>% 
  select(lname,last_algorithm,zscore) %>% 
  filter(zscore > .512 &
           zscore < .532) %>% 
  distinct() %>% 
  slice_sample(n = 10) %>% 
  mutate(percentile = "50th percentile")


z75 <- primary %>% 
  select(lname,last_algorithm,zscore) %>% 
  filter(zscore > .678 &
           zscore < .698) %>% 
  distinct() %>% 
  slice_sample(n = 10) %>% 
  mutate(percentile = "75th percentile")


z99 <- primary %>% 
  select(lname,last_algorithm,zscore) %>% 
  filter(zscore > .75 &
           zscore < .78) %>% 
  distinct() %>%
  slice_sample(n = 10) %>% 
  mutate(percentile = "99th percentile (most fluent)")

zs <- rbind(z1,z25,z50,z75,z99) %>% 
  arrange(last_algorithm, lname) %>% 
  mutate(zscore = round(zscore, 2),
         last_algorithm = round(last_algorithm, 2)) %>% 
  rename(`Last Name` = lname,
         `Algorithm Rating` = last_algorithm,
         `Z-score` = zscore,
         Percentile = percentile) 

stargazer(zs, 
          type = "latex", 
          summary = F,
          style = "ajps",
          font.size = "footnotesize",
          title = "Examples of Algorithmic Name Ratings at Different Percentiles",
          label = "tab:name-examples",
          notes = "Notes: Based on  primary congressional candidates' names",
          rownames = F,
          out = "Tables/algorithm-name-examples.tex")








